static grant_ref_t gref_head, gref_terminal;
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
+#define GRANTREF_INVALID (1<<15)
#endif
-unsigned long rec_ring_free;
-blkif_request_t rec_ring[BLK_RING_SIZE];
+static struct blk_shadow {
+ blkif_request_t req;
+ unsigned long request;
+ unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+} blk_shadow[BLK_RING_SIZE];
+unsigned long blk_shadow_free;
-static int recovery = 0; /* "Recovery in progress" flag. Protected
- * by the blkif_io_lock */
+static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
static void kick_pending_request_queues(void);
int __init xlblk_init(void);
-void blkif_completion( blkif_request_t *req );
+static void blkif_completion(struct blk_shadow *s);
-static inline int GET_ID_FROM_FREELIST( void )
+static inline int GET_ID_FROM_FREELIST(void)
{
- unsigned long free = rec_ring_free;
-
+ unsigned long free = blk_shadow_free;
BUG_ON(free > BLK_RING_SIZE);
-
- rec_ring_free = rec_ring[free].id;
-
- rec_ring[free].id = 0x0fffffee; /* debug */
-
+ blk_shadow_free = blk_shadow[free].req.id;
+ blk_shadow[free].req.id = 0x0fffffee; /* debug */
return free;
}
-static inline void ADD_ID_TO_FREELIST( unsigned long id )
+static inline void ADD_ID_TO_FREELIST(unsigned long id)
{
- rec_ring[id].id = rec_ring_free;
- rec_ring_free = id;
+ blk_shadow[id].req.id = blk_shadow_free;
+ blk_shadow[id].request = 0;
+ blk_shadow_free = id;
}
#define DISABLE_SCATTERGATHER() (sg_operation = -1)
#endif
-static inline void translate_req_to_pfn(blkif_request_t *xreq,
- blkif_request_t *req)
+static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
{
+#ifndef CONFIG_XEN_BLKDEV_GRANT
int i;
+#endif
- xreq->operation = req->operation;
- xreq->nr_segments = req->nr_segments;
- xreq->device = req->device;
- /* preserve id */
- xreq->sector_number = req->sector_number;
+ s->req = *r;
- for ( i = 0; i < req->nr_segments; i++ )
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- xreq->frame_and_sects[i] = req->frame_and_sects[i];
-#else
- xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+ for ( i = 0; i < r->nr_segments; i++ )
+ s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]);
#endif
}
-static inline void translate_req_to_mfn(blkif_request_t *xreq,
- blkif_request_t *req)
+static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
{
+#ifndef CONFIG_XEN_BLKDEV_GRANT
int i;
+#endif
- xreq->operation = req->operation;
- xreq->nr_segments = req->nr_segments;
- xreq->device = req->device;
- xreq->id = req->id; /* copy id (unlike above) */
- xreq->sector_number = req->sector_number;
+ *r = s->req;
- for ( i = 0; i < req->nr_segments; i++ )
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- xreq->frame_and_sects[i] = req->frame_and_sects[i];
-#else
- xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+ for ( i = 0; i < s->req.nr_segments; i++ )
+ r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]);
#endif
}
}
-
-
/************************** KERNEL VERSION 2.6 **************************/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
static void kick_pending_request_queues(void)
{
-
if ( (xlbd_blk_queue != NULL) &&
test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
{
*/
xlbd_blk_queue->request_fn(xlbd_blk_queue);
}
-
}
* When usage drops to zero it may allow more VBD updates to occur.
* Update of usage count is protected by a per-device semaphore.
*/
- if (--di->mi->usage == 0) {
+ if ( --di->mi->usage == 0 )
vbd_update();
- }
return 0;
}
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
command, (long)argument, inode->i_rdev);
- switch (command) {
-
+ switch ( command )
+ {
case HDIO_GETGEO:
/* return ENOSYS to use defaults */
return -ENOSYS;
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST();
- rec_ring[id].id = (unsigned long) req;
+ blk_shadow[id].request = (unsigned long)req;
ring_req->id = id;
ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
buffer_ma >> PAGE_SHIFT,
rq_data_dir(req) );
+ blk_shadow[id].frame[ring_req->nr_segments] =
+ buffer_ma >> PAGE_SHIFT;
+
ring_req->frame_and_sects[ring_req->nr_segments++] =
(((u32) ref) << 16) | (fsect << 3) | lsect;
+
#else
ring_req->frame_and_sects[ring_req->nr_segments++] =
buffer_ma | (fsect << 3) | lsect;
blk_ring.req_prod_pvt++;
/* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn(&rec_ring[id], ring_req);
+ pickle_request(&blk_shadow[id], ring_req);
return 0;
}
queued = 0;
- while ((req = elv_next_request(rq)) != NULL) {
- if (!blk_fs_request(req)) {
+ while ( (req = elv_next_request(rq)) != NULL )
+ {
+ if ( !blk_fs_request(req) )
+ {
end_request(req, 0);
continue;
}
blk_stop_queue(rq);
break;
}
+
DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
req, req->cmd, req->sector, req->current_nr_sectors,
req->nr_sectors, req->buffer,
rq_data_dir(req) ? "write" : "read");
+
blkdev_dequeue_request(req);
- if (blkif_queue_request(req)) {
+ if ( blkif_queue_request(req) )
+ {
blk_stop_queue(rq);
break;
}
+
queued++;
}
- if (queued != 0)
+ if ( queued != 0 )
flush_requests();
}
unsigned long id;
bret = RING_GET_RESPONSE(&blk_ring, i);
- id = bret->id;
- req = (struct request *)rec_ring[id].id;
- blkif_completion( &rec_ring[id] );
+ id = bret->id;
+ req = (struct request *)blk_shadow[id].request;
- ADD_ID_TO_FREELIST(id); /* overwrites req */
+ blkif_completion(&blk_shadow[id]);
+
+ ADD_ID_TO_FREELIST(id);
switch ( bret->operation )
{
if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
DPRINTK("Bad return from blkdev data request: %x\n",
bret->status);
-
+
if ( unlikely(end_that_request_first
(req,
(bret->status == BLKIF_RSP_OKAY),
blk_ring.req_prod_pvt - 1);
bh = (struct buffer_head *)id;
- bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
-
- rec_ring[req->id].id = id;
-
+ bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
+ blk_shadow[req->id].request = (unsigned long)id;
+
#ifdef CONFIG_XEN_BLKDEV_GRANT
/* install a grant reference. */
ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
buffer_ma >> PAGE_SHIFT,
( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
+ blk_shadow[id].frame[req->nr_segments] =
+ buffer_ma >> PAGE_SHIFT;
+
req->frame_and_sects[req->nr_segments] =
(((u32) ref ) << 16) | (fsect << 3) | lsect;
#else
DISABLE_SCATTERGATHER();
/* Update the copy of the request in the recovery ring. */
- translate_req_to_pfn(&rec_ring[req->id], req );
+ pickle_request(&blk_shadow[req->id], req );
return 0;
}
req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
xid = GET_ID_FROM_FREELIST();
- rec_ring[xid].id = id;
+ blk_shadow[xid].request = (unsigned long)id;
req->id = xid;
req->operation = operation;
buffer_ma >> PAGE_SHIFT,
( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
+ blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT;
+
req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect;
#else
req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
#endif
/* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn(&rec_ring[xid], req );
+ pickle_request(&blk_shadow[xid], req);
blk_ring.req_prod_pvt++;
bret = RING_GET_RESPONSE(&blk_ring, i);
id = bret->id;
- bh = (struct buffer_head *)rec_ring[id].id;
+ bh = (struct buffer_head *)blk_shadow[id].request;
- blkif_completion( &rec_ring[id] );
+ blkif_completion(&blk_shadow[id]);
ADD_ID_TO_FREELIST(id);
id = GET_ID_FROM_FREELIST();
req_d->id = id;
- rec_ring[id].id = (unsigned long) req;
+ blk_shadow[id].request = (unsigned long)req;
- translate_req_to_pfn( &rec_ring[id], req );
+ pickle_request(&blk_shadow[id], req);
blk_ring.req_prod_pvt++;
flush_requests();
{
int i;
blkif_request_t *req;
+ struct blk_shadow *copy;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int j;
+#endif
+
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL);
+ BUG_ON(copy == NULL);
+ memcpy(copy, blk_shadow, sizeof(blk_shadow));
- /* Hmm, requests might be re-ordered when we re-issue them.
- * This will need to be fixed once we have barriers */
+ /* Stage 2: Set up free list. */
+ memset(&blk_shadow, 0, sizeof(blk_shadow));
+ for ( i = 0; i < BLK_RING_SIZE; i++ )
+ blk_shadow[i].req.id = i+1;
+ blk_shadow_free = blk_ring.req_prod_pvt;
+ blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
- /* Stage 1 : Find active and move to safety. */
+ /* Stage 3: Find pending requests and requeue them. */
for ( i = 0; i < BLK_RING_SIZE; i++ )
{
- if ( rec_ring[i].id >= PAGE_OFFSET )
+ /* Not in use? */
+ if ( copy[i].request == 0 )
+ continue;
+
+ /* Grab a request slot and unpickle shadow state into it. */
+ req = RING_GET_REQUEST(
+ &blk_ring, blk_ring.req_prod_pvt);
+ unpickle_request(req, ©[i]);
+
+ /* We get a new request id, and must reset the shadow state. */
+ req->id = GET_ID_FROM_FREELIST();
+ memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i]));
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* Rewrite any grant references invalidated by suspend/resume. */
+ for ( j = 0; j < req->nr_segments; j++ )
{
- req = RING_GET_REQUEST(&blk_ring,
- blk_ring.req_prod_pvt);
- translate_req_to_mfn(req, &rec_ring[i]);
- blk_ring.req_prod_pvt++;
+ if ( req->frame_and_sects[j] & GRANTREF_INVALID )
+ gnttab_grant_foreign_access_ref(
+ blkif_gref_from_fas(req->frame_and_sects[j]),
+ rdomid,
+ blk_shadow[req->id].frame[j],
+ rq_data_dir((struct request *)
+ blk_shadow[req->id].request));
+ req->frame_and_sects[j] &= ~GRANTREF_INVALID;
}
- }
+ blk_shadow[req->id].req = *req;
+#endif
- /* Stage 2 : Set up shadow list. */
- for ( i = 0; i < blk_ring.req_prod_pvt; i++ )
- {
- req = RING_GET_REQUEST(&blk_ring, i);
- rec_ring[i].id = req->id;
- req->id = i;
- translate_req_to_pfn(&rec_ring[i], req);
+ blk_ring.req_prod_pvt++;
}
- /* Stage 3 : Set up free list. */
- for ( ; i < BLK_RING_SIZE; i++ )
- rec_ring[i].id = i+1;
- rec_ring_free = blk_ring.req_prod_pvt;
- rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff;
-
- /* blk_ring->req_prod will be set when we flush_requests().*/
- wmb();
+ kfree(copy);
- /* Switch off recovery mode, using a memory barrier to ensure that
- * it's seen before we flush requests - we don't want to miss any
- * interrupts. */
recovery = 0;
+
+ /* blk_ring->req_prod will be set when we flush_requests().*/
wmb();
/* Kicks things back into life. */
flush_requests();
- /* Now safe to left other peope use interface. */
+ /* Now safe to left other people use the interface. */
blkif_state = BLKIF_STATE_CONNECTED;
}
printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n");
- rec_ring_free = 0;
+ blk_shadow_free = 0;
+ memset(blk_shadow, 0, sizeof(blk_shadow));
for ( i = 0; i < BLK_RING_SIZE; i++ )
- rec_ring[i].id = i+1;
- rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff;
+ blk_shadow[i].req.id = i+1;
+ blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
(void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
void blkdev_resume(void)
{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int i, j;
+ for ( i = 0; i < BLK_RING_SIZE; i++ )
+ for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
+ blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
+#endif
send_driver_status(1);
}
-void blkif_completion(blkif_request_t *req)
+static void blkif_completion(struct blk_shadow *s)
{
int i;
#ifdef CONFIG_XEN_BLKDEV_GRANT
- grant_ref_t gref;
-
- for ( i = 0; i < req->nr_segments; i++ )
- {
- gref = blkif_gref_from_fas(req->frame_and_sects[i]);
- gnttab_release_grant_reference(&gref_head, gref);
- }
+ for ( i = 0; i < s->req.nr_segments; i++ )
+ gnttab_release_grant_reference(
+ &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
#else
/* This is a hack to get the dirty logging bits set */
- switch ( req->operation )
+ if ( s->req.operation == BLKIF_OP_READ )
{
- case BLKIF_OP_READ:
- for ( i = 0; i < req->nr_segments; i++ )
+ for ( i = 0; i < s->req.nr_segments; i++ )
{
- unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
+ unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT;
unsigned long mfn = phys_to_machine_mapping[pfn];
xen_machphys_update(mfn, pfn);
}
- break;
}
#endif
}